如何解析trtexec输出的模型推理结果数据


./trtexec --onnx=efficientdet-d0-s.onnx --loadInputs='data':o4_clip-1_raw_data.bin --saveEngine=efficientdet-d0-s.engine --exportOutput=trtexec-result.json


[ { "name" : "regression" , "dimensions" : "1x49104x4" , "values" : [ 5.78506, 6.88422, -1.56519, -17.5148, -0.113892, 0.545003, -1.52597, -0.767865, 1.00629, 0.163376, 0.0242282, -1.89316, 0.187985, 0.499627, -0.414611, -2.14051, 0.371698, 0.478594, -0.0126426, 2.01423, 0.997112, 0.517545, 1.88847, -0.707338, 0.157562, 0.0627687, 0.10975, -0.430063, 0.537361, 0.670655, 0.428142, ...]}, { "name" : "anchors" , "dimensions" : "1x49104x4" , "values" : [ -12, -12, 20, 20, -7.2, -18.4, 15.2, 26.4, -18.4, -7.2, 26.4, 15.2, -16.1587, -16.1587, 24.1587, 24.1587, -10.1111, -24.2222, 18.1111, 32.2222, -24.2222, -10.1111, 32.2222, 18.1111, -21.3984, -21.3984, 29.3984, 29.3984, -13.7789, -31.5578, 21.7789, 39.5578, -31.5578, -13.7789, 39.5578, 21.7789, -12, ...]}, { "name" : "classification" , "dimensions" : "1x49104x1" , "values" : [ 0.000323705, 3.062e-07, 0.00457684, 0.000632869, 0.0004986, 0.000207652, 0.000125256, 6.19738e-07, 0.0203817, 8.39792e-06, 8.40121e-09, 9.50497e-05, 9.16859e-06, 2.48826e-05, 1.39859e-06, 3.46441e-06, 2.93581e-08, 0.000207522, 6.74278e-06, 1.8361e-08, 3.44744e-05, 3.35026e-06, 2.89377e-05, 3.85066e-07, 1.02452e-05,...]} ]

怎么把它这个结果解析出来并过滤处理成最终的结果并且在图上画出识别结果的bbox来呢?这个和一般的模型的post process是类似的,需要结合网络本身设计对输出数据做后处理得出合理的bbox和对应的class以及score,针对我这个修改过的efficientdet导出的onnx调用trtexec推理得到的json结果数据进行解析的相关代码如下:

#encoding: utf-8 import numpy as np import os import cv2 import json classes = ['baggage'] def nms(bboxes, classifictaion, thresh): x1 = bboxes[:, 0] y1 = bboxes[:, 1] x2 = bboxes[:, 2] y2 = bboxes[:, 3] scores = classifictaion[:, 1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr threshold: box_c += 1 x1 = int(bboxes[i][0]) y1 = int(bboxes[i][1]) x2 = int(bboxes[i][2]) y2 = int(bboxes[i][3]) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.putText(img, '{} {:.3f}'.format(obj, score), (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) cv2.imwrite(output_img,img) #cv2.imshow("result",img) #cv2.waitKey(100) #print("draw {} boxes".format(box_c)) if __name__ == "__main__": json_file = "/usr/src/tensorrt/bin/trtexec-result.json" input_img = "/usr/src/tensorrt/bin/o4_clip-1.jpg" output_img = "/usr/src/tensorrt/bin/o4_clip-1-TensorRT_result.jpg" num_class = 1 parse_result(num_class,json_file,input_img,output_img)


""" Simple Inference Script of EfficientDet-Pytorch """ import json import time import onnxruntime import torch from torch.backends import cudnn from backbone import EfficientDetBackbone import cv2 import numpy as np from efficientdet.utils import BBoxTransform, ClipBoxes from utils.utils import preprocess, invert_affine, postprocess def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() else: return super(MyEncoder, self).default(obj) def to_list(arr,r,c): data_list = [] for i in range(r): for j in range(c): data_list.append(arr[i][j]) return data_list compound_coef = 0 force_input_size = None # set None to use default size #img_path = 'test/img.png' img_path = 'o4_clip-1.png' threshold = 0.1 iou_threshold = 0.1 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['baggage'] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) ''' model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) #model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.load_state_dict(torch.load(f'logs/airport/efficientdet-d0_499_79500.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() ''' ort_session = onnxruntime.InferenceSession("convert/efficientdet-d0.onnx") with torch.no_grad(): ''' #features, regression, classification, anchors = model(x) regression, classification, anchors = model(x) ''' nx = to_numpy(x) #np.save("raw_data_numpy.npy",nx) nx.tofile("raw_data.bin") ort_inputs = {ort_session.get_inputs()[0].name: nx} ort_outs = ort_session.run(None, ort_inputs) #shapes: (1, 49104, 4) (1, 49104, 1) (1, 49104, 4) #print("shapes:",ort_outs[0].shape,ort_outs[1].shape,ort_outs[2].shape) r = 49104 results = [] dict_reg = {} dict_reg["name"] = "regression" dict_reg["dimensions"] = "1x49104x4" dict_reg["values"] = to_list(ort_outs[0][0],r,4) results.append(dict_reg) dict_cls = {} dict_cls["name"] = "classification" dict_cls["dimensions"] = "1x49104x1" dict_cls["values"] = to_list(ort_outs[1][0],r,1) results.append(dict_cls) dict_ach = {} dict_ach["name"] = "anchors" dict_ach["dimensions"] = "1x49104x4" dict_ach["values"] = to_list(ort_outs[2][0],r,4) results.append(dict_ach) f = open("onnxruntime_result.json","w") json.dump(results,f,cls=MyEncoder,indent=2) f.close() regression = torch.from_numpy(ort_outs[0]) classification = torch.from_numpy(ort_outs[1]) anchors = torch.from_numpy(ort_outs[2]) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) def display(preds, imgs, imshow=True, imwrite=False): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) if score >= threshold: cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score), (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: cv2.imwrite(f'o4_clip-1-OnnxRuntime_result.jpg', imgs[i]) out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=False, imwrite=True)







